#!/bin/bash
#
# Description: Cut whole virtual slide into tiles
# 
# The MIT License (MIT)
# Copyright (c) 2014-2016, Bas G.L. Nelissen, UMC Utrecht, the Netherlands.
# 
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# 

# Variables
SCRIPTNAME="$(basename $0)"
DESCRIPTIONSHORT="Cut whole virtual slide into tiles"
DEPENDENCIES=("convert" "identify" "perl"  "openslide-write-png") # must exist in PATH
SUPPORTED_FORMATS=("TIFF" "Aperio" "Hamamatsu" "iScan" "iScanHT" "Leica")
DEFAULT_MASKSUFFIX=".mask.png"
DEFAULT_LAYER=3 # 3
DEFAULT_HEIGHT=2000  # 2000
DEFAULT_WIDTH=2000   # 2000
DEFAULT_OVERLAP=0    # 0
DEFAULT_OUTPUTDIRSUFFIX=".tiles"
DEFAULT_QSUBARGS="-pe threaded 4 -q veryshort"
MINBYTESIZE=100000  # tissue detection is simple, this is the min size in bytes of the output .png for tissue to be present
# sort arrays
DEPENDENCIES=( $( for el in "${DEPENDENCIES[@]}"; do echo "$el"; done | sort) )
SUPPORTED_FORMATS=( $( for el in "${SUPPORTED_FORMATS[@]}"; do echo "$el"; done | sort) )

# Exit and cleanup
set -e # set -e makes it possible to catch the exit signal and cleanup if needed.
function cleanup {
  rm -rf "$TMPSTORAGE"
}
trap cleanup EXIT

# Errors go to stderr
err() {
  echo "${SCRIPTNAME} ERROR: $@" >&2
}

# usage message
usage() {
verbose_debug "usage()"
cat <<- EOF
$SCRIPTNAME --help for more information.
EOF
}

# version info
version() {
cat << EOF
slideToolkit 0.1 beta
($SCRIPTNAME is part of the slideToolkit)

The MIT License (MIT) <http://opensource.org/licenses/MIT>
Copyright (c) 2014-2016, Bas G.L. Nelissen, UMC Utrecht, the Netherlands.

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

There is NO WARRANTY, to the extent permitted by law.

Written by Bas G.L. Nelissen | https://github.com/bglnelissen.
EOF
}


# help message
helpMessage() {
verbose_debug "helpMessage()"
cat <<- EOF
${SCRIPTNAME}: ${DESCRIPTIONSHORT}

usage:
  $SCRIPTNAME [options] -f <filename>

options:
  -f, --file <filename>     virtual slide to create tiles from
  -m, --mask <filename>     black mask for excluding parts of the virtual slide 

  [-l, --layer] <integer>   force specific layer ["$DEFAULT_LAYER"]
  [--height] <pixels>       tile height in pixels ["$DEFAULT_HEIGHT"]
  [--width] <pixels>        tile width in pixels ["$DEFAULT_WIDTH"]
  [--overlap] <pixels>      image overlap of adjacent tiles ["$DEFAULT_OVERLAP"]
  [--keep-empty]            do not delete empty tiles when found
  [--outputsuffix] <text>   output directory suffix
  [--ignore-dependencies]   ignores all dependencies, but gives warning
  [--verbose]               verbose output (for debugging)
  
  --help                    display this help
  --version                 display version and license information

examples:
  $SCRIPTNAME --layer 0 "file.tif"
  $SCRIPTNAME --keep-empty --mask="file.mask.png" --file="file.tif"

print a job list:
  find "\$(pwd)" -iname "*tif" -type f -exec printf " \$(command -v "$SCRIPTNAME")" \; \\
        -exec printf " --mask=" \; \\
        -exec bash -c "printf \"{}\" | sed 's/\.TIF$/.mask.png/g' " \; \\
        -exec echo " --file="{}" " \;

print a job list for 'qsub':
  find "\$(pwd)" -iname "*tif" -type f \\
        -exec printf " qsub $DEFAULT_QSUBARGS" \; \\
        -exec printf " \$(command -v "$SCRIPTNAME")" \; \\
        -exec printf " --mask=\"" \; \\
        -exec bash -c "printf \"{}\" | perl -pe 's/\.TIF$/$DEFAULT_MASKSUFFIX/i' " \; \\
        -exec printf "\"" \; \\
        -exec echo " --file=\""{}"\" " \;

dependencies: ${DEPENDENCIES[@]}
supported formats: ${SUPPORTED_FORMATS[@]}

Cut slides into tiles for further processing. Reasonable tile size is
'$DEFAULT_WIDTH' by '$DEFAULT_HEIGHT' pixels (default). Tiles are stored
in a folder with suffix '$DEFAULT_OUTPUTDIRSUFFIX' at the same location
as the input file. By default all empty tiles are deleted. A
memory-mapped disk file (image-magick .mpc file) is used when a mask is
used. The fast and efficient 'openslide-write-png' is used when no mask
is used. Pipe the 'job list' output in 'parallel' for parallel
processing. For some slides, the virtual slide format is not known. For
these instances you need to run slideInfo to find the correct layer
manually. Use the --layer flag to set the correct layer. The amount
--overlap is only affective on one side, the left tile overlaps the tile
on the right, and the top tile overlaps the tile below.

The slideToolkit and all its tools are released under the terms of the MIT license.
The slideToolkit (C) 2014-2016, Bas G.L. Nelissen, UMC Utrecht, the Netherlands.
Report issues at https://github.com/bglnelissen/slideToolkit/issues.

EOF
}

# Menu
# Empty variables
FILE=""
MASK=""
LAYER=""
KEEPEMPTYTILES=""
VERBOSE=""
OUTPUTDIRSUFFIX=""
HEIGHT=""
WIDTH=""
OVERLAP=""
IGNOREDEPENDENCIES=""
# illegal option
illegalOption() {
cat <<- EOF
$SCRIPTNAME: illegal option $1
$(usage)
EOF
exit 1
}
# loop through options
while :
do
  case $1 in
    --help | -\?)
      helpMessage
      exit 0 ;;
    --version )
      version
      exit 0 ;;
    -h )
      usage
      exit 0 ;;
    -f | --file)
      FILE=$2
      shift 2 ;;
    --file=*)
      FILE=${1#*=}
      shift ;;
    --keep-empty)
      KEEPEMPTYTILES=TRUE
      shift ;;
    -m | --mask)
      MASK=$2
      shift 2 ;;
    --mask=*)
      MASK=${1#*=}
      shift ;;
    -l | --layer)
      LAYER=$2
      shift 2 ;;
    --layer=*)
      LAYER=${1#*=}
      shift ;;
    --outputsuffix)
      OUTPUTDIRSUFFIX=$2
      shift 2 ;;
    --outputsuffix=*)
      OUTPUTDIRSUFFIX=${1#*=}
      shift ;;
    --height=*)
      HEIGHT=${1#*=}
      shift ;;
    --height)
      HEIGHT=$2
      shift 2 ;;
    --width=*)
      WIDTH=${1#*=}
      shift ;;
    --width)
      WIDTH=$2
      shift 2 ;;
    --overlap=*)
      OVERLAP=${1#*=}
      shift ;;
    --overlap)
      OVERLAP=$2
      shift 2 ;;
     --ignore-dependencies)
      IGNOREDEPENDENCIES=TRUE
      shift ;;
     --verbose)
      VERBOSE=TRUE
      shift ;;
    --) # End of all options
      shift
      break ;;
    -*)
      illegalOption "$1"
      shift ;;
    *)  # no more options. Stop while loop
      break ;;
  esac
done

# DEFAULTS
# set FILE 
if [ "$FILE" != "" ]; then
  FILE="$FILE"
else
  FILE="$1"
fi
# set MASK 
if [ "$MASK" != "" ]; then
  MASK="$MASK"
fi
# set LAYER 
if [ "$LAYER" != "" ]; then
  LAYER="$LAYER"
else
  LAYER="$DEFAULT_LAYER"
fi
# set OUTPUTDIRSUFFIX 
if [ "$OUTPUTDIRSUFFIX" != "" ]; then
  OUTPUTDIRSUFFIX="$OUTPUTDIRSUFFIX"
else
  OUTPUTDIRSUFFIX="$DEFAULT_OUTPUTDIRSUFFIX"
fi
# set HEIGHT 
if [ "$HEIGHT" != "" ]; then
  HEIGHT="$HEIGHT"
else
  HEIGHT="$DEFAULT_HEIGHT"
fi
# set WIDTH 
if [ "$WIDTH" != "" ]; then
  WIDTH="$WIDTH"
else
  WIDTH="$DEFAULT_WIDTH"
fi
# set OVERLAP 
if [ "$OVERLAP" != "" ]; then
  OVERLAP="$OVERLAP"
else
  OVERLAP="$DEFAULT_OVERLAP"
fi
# set KEEPEMPTYTILES  (this if statement does nothing, as it is set to set in Menu)
if [[ "$KEEPEMPTYTILES" =~ (true|TRUE|YES|yes) ]] ; then
  KEEPEMPTYTILES=true
else
  KEEPEMPTYTILES=false
fi
# set IGNOREDEPENDENCIES
if [[ "$IGNOREDEPENDENCIES" =~ (true|TRUE|YES|yes) ]] ; then
  IGNOREDEPENDENCIES=true
else
  IGNOREDEPENDENCIES=false
fi
# set VERBOSE  (this if statement does nothing, as it is set to set in Menu)
if [[ "$VERBOSE" =~ (true|TRUE|YES|yes) ]] ; then
  VERBOSE=true
else
  VERBOSE=false
fi

# VERBOSE / DEBUG
verbose_debug() {
  if [[ "$VERBOSE" == "true" ]]; then
    echo "VERBOSE: $(date +'%Y-%m-%d %H:%M:%S'): $@" >&1 # >/dev/null
  else
    echo "VERBOSE: $(date +'%Y-%m-%d %H:%M:%S'): $@" >&1 >/dev/null
  fi
}

# requirements
checkRequirements() {
verbose_debug "checkRequirements()"
  if ! [[ -f "$FILE" ]] ; then
    err "No such file: $FILE"
    usage
    exit 1
  fi
  if ! [[ "$MASK" == "" ]] ; then
    if ! [[ -f "$MASK" ]] ; then
      err "No such mask: $MASK"
      usage
      exit 1
    fi
  fi
  regexnumeric='^[0-9]+$'
  if ! [[ "$LAYER" =~ $regexnumeric ]] ; then
    err "Invalid format for --layer: $LAYER"
    usage
    exit 1
  fi
  if ! [[ "$HEIGHT" =~ $regexnumeric ]] ; then
    err "Invalid format for --height: $HEIGHT"
    usage
    exit 1
  fi
  if ! [[ "$WIDTH" =~ $regexnumeric ]] ; then
    err "Invalid format for --width: $WIDTH"
    usage
    exit 1
  fi
  if ! [[ "$OVERLAP" =~ $regexnumeric ]] ; then
    err "Invalid format for --overlap: $OVERLAP"
    usage
    exit 1
  fi

  # check input files
  S="$(echo $(cd $(dirname $FILE); pwd)/$(basename $FILE))" # full path $FILE
  INPUTFILENAME="${S%.*}"             # full path without extension
  OUTPUTDIRNAME="$INPUTFILENAME""$OUTPUTDIRSUFFIX"
  INPUTSCANEXT="${S##*.}"             # input scan extension only
  M="${MASK}"       # input mask
  FILENAMESCAN="${S##*/}"
  SHORTNAME="$(basename $INPUTFILENAME)"
  FILENAMEMASK="${M##*/}"
  LAYER="$LAYER"                             # layer 3 is 20x (for Roche scanner)
  TILESWIDTH="$WIDTH"
  TILESHEIGHT="$HEIGHT"
  STARTDATE="$(date)"
    
  if ! [[ -f "$S" ]] ; then
    err "No such scan: $S"
    usage
    exit 1
  fi
  # check file extensions, aperio and svs and ndpi is quite similar as tif and should work fine.
  if ! [[ "$INPUTSCANEXT" =~ (tif|tiff|TIF|TIFF|svs|SVS|ndpi|NDPI) ]] ; then
    err "TIF or similar extension expected. Extension found: $INPUTSCANEXT"
    usage
    exit 1
  fi
}

# Dependencies
checkDependencies(){
  DEPS=""
  DEPS_FAIL="0"
  for DEP in ${DEPENDENCIES[@]}; do
    if [[ 0 != "$(command -v "$DEP" >/dev/null ;echo "$?")" ]]; then
       err "Missing dependency: $DEP"
       DEPS_FAIL=$(($DEPS_FAIL + 1))
    fi
  done
  if [[ "$DEPS_FAIL" > 0 ]]; then
    # only warning when --ignore-dependencies is set.
    if [[ "$IGNOREDEPENDENCIES" == "true" ]]; then
      err "Ignoring missing dependencies (${DEPS_FAIL}), continue..."
    else
      err "Fix missing dependencies (${DEPS_FAIL}), exit."
      usage
      exit 1
    fi
  fi
}

# actual program
createTilesConvert(){
verbose_debug "createTilesConvert()"
  TMPSTORAGE="$(mktemp -d -t slide2Tiles_XXXXXXXXXXXX)" # -t relative to $TMPDIR, so it should be HPC save...
  buffer="${TMPSTORAGE}/${SHORTNAME}.mpc"
  verbose_debug "Virtual slide: $S"
  dimensionsslide="$(identify -format %w "${S}"["${LAYER}"])"x"$(identify -format %h "${S}"["${LAYER}"])"
  verbose_debug "Create buffer [$dimensionsslide]; $buffer"
  date1="$(date +'%s')"
  # convert -limit area "${MEMORYLIMIT}" "${S}[${LAYER}]" \( "${M}" -fuzz 99% -transparent white -scale ${dimensionsslide} -negate \) -composite -fuzz 3% -trim +repage -bordercolor white -border 30x30 +repage "${buffer}"
  convert "${S}[${LAYER}]" \( "${M}" -fuzz 99% -transparent white -scale ${dimensionsslide} -negate \) -composite -fuzz 3% -trim +repage -bordercolor white -border 30x30 +repage "${buffer}"

  date2="$(date +'%s')"
  diff="$(($date2-$date1))"
  verbose_debug "Done buffer; $(($diff / 60)) minutes and $(($diff % 60)) seconds"

  date1="$(date +'%s')"
  # convert -limit area "${MEMORYLIMIT}" "${buffer}" -scale "$TILESWIDTH"x"$TILESHEIGHT" "${INPUTFILENAME}.bufferexample.png"
  convert "${buffer}" -scale "$TILESWIDTH"x"$TILESHEIGHT" "${INPUTFILENAME}.bufferexample.png"

  date2="$(date +'%s')"
  diff="$(($date2-$date1))"
  verbose_debug "bufferexample.png created; $(($diff / 60)) minutes and $(($diff % 60)) seconds"

  mkdir -p "$OUTPUTDIRNAME" # create output directory
  # create tiles per row, measure dimensions of newly create image
  width="$(identify -format %w ${buffer})"
  height="$(identify -format %h ${buffer})"
  dimensionsbuffer="$width"x"$height"
  limit_w="$(($width / $TILESWIDTH))"
  limit_h="$(($height / $TILESHEIGHT))"

  verbose_debug "Dimensions [$dimensionsbuffer]; tile size [${TILESWIDTH}x${TILESHEIGHT}], overlap [${OVERLAP}]"
  date1="$(date +'%s')"
  for y in $(seq 0 "$limit_h"); do
    for x in $(seq 0 "$limit_w"); do
      # one tile
      tile="${SHORTNAME}.X${y}.Y${x}.tile.png"
      TILESWIDTH_OVERLAP="$(( $TILESWIDTH + $OVERLAP ))"
      TILESHEIGHT_OVERLAP="$(( $TILESHEIGHT + $OVERLAP ))"
      w="$(( x * $TILESWIDTH ))"
      h="$(( y * $TILESHEIGHT ))"
      FILE="${OUTPUTDIRNAME}/${tile}"
      filename="${FILE%.*}" # full path without extension
      convert "$buffer" -crop "${TILESWIDTH_OVERLAP}x${TILESHEIGHT_OVERLAP}+$w+$h" +repage "$FILE"
      # find tissue, needs improvement
      if [[ "$(wc -c < "$FILE" )" -ge "$MINBYTESIZE" ]]; then
        # more than MINBYTESIZE bytes... tissue found
        # fit within width and height
        TISSUE="tissue";
        mv "$FILE" "${filename}.${TISSUE}.png"
      else 
        # no tissue found
        TISSUE="empty";
        if [ "$KEEPEMPTYTILES" == "true" ]; then
           mv "$FILE" "${filename}.${TISSUE}.png"
        else
          rm -rf "$FILE"
        fi
      fi;
    done
  done
  date2="$(date +'%s')"
  diff="$(($date2-$date1))"
  verbose_debug "Done tiles; $(($diff / 60)) minutes and $(($diff % 60)) seconds"
  rm -rf "$TMPSTORAGE"
}

createTilesOpenslide(){
verbose_debug "createTilesOpenslide()"
  TMPSTORAGE="$(mktemp -d -t slide2Tiles_XXXXXXXXXXXX)" # -t relative to $TMPDIR, so it should be HPC save...
  AR_TIFFSPLIT_SUFFIX=($(echo aa{a..z}))  # this is the standard suffix of tiffsplit. # myArray=('red' 'orange' 'green')
  # declare -a AR_TIFFSPLIT_SUFFIX # declare array
  verbose_debug "Virtual slide: $S"
  verbose_debug "Tempstorage: $TMPSTORAGE"
  verbose_debug "Layer: $LAYER"
  verbose_debug "Suffix array: $AR_TIFFSPLIT_SUFFIX"
  verbose_debug "Suffix array item: " "${AR_TIFFSPLIT_SUFFIX[${LAYER}]}"
  # extract layer to tmp
  TMPS="$TMPSTORAGE"/"$FILENAMESCAN"
  cp "$S" "$TMPS"
  verbose_debug "TMPS: $TMPS"
  cd "$TMPSTORAGE" && tiffsplit "$TMPS" tmp #tmp is the standard prefix of this file, example: tmpaaf.tif
  SLIDE="${TMPSTORAGE}/tmp${AR_TIFFSPLIT_SUFFIX[${LAYER}]}.tif"
  verbose_debug "Working slide: $SLIDE"
  verbose_debug "List temp dir: "$(ls "$TMPSTORAGE")""
  mkdir -p "$OUTPUTDIRNAME" # create output directory
  verbose_debug "Outputdir: $OUTPUTDIRNAME"
  width="$(identify -format %w "${SLIDE}")"
  height="$(identify -format %h "${SLIDE}")"
  limit_w="$(($width / $TILESWIDTH))"
  limit_h="$(($height / $TILESHEIGHT))"

  for y in $(seq 0 "$limit_h"); do
    for x in $(seq 0 "$limit_w"); do
      # one tile
      tile="${SHORTNAME}.X${y}.Y${x}.tile.png"
      TILESWIDTH_OVERLAP="$(( $TILESWIDTH + $OVERLAP ))"
      TILESHEIGHT_OVERLAP="$(( $TILESHEIGHT + $OVERLAP ))"
      w="$(( x * $TILESWIDTH ))"
      h="$(( y * $TILESHEIGHT ))"
      FILE="${OUTPUTDIRNAME}/${tile}"
      filename="${FILE%.*}" # full path without extension
      
      # openslide-write-png [OPTION...] slide x y layer width height output.png
      verbose_debug "openslide-write-png" "$SLIDE" "$w" "$h" 0 "$TILESWIDTH_OVERLAP" "$TILESHEIGHT_OVERLAP" "$FILE"
      openslide-write-png "$SLIDE" "$w" "$h" 0 "$TILESWIDTH_OVERLAP" "$TILESHEIGHT_OVERLAP" "$FILE" >/dev/null 2>/dev/null # suppress thousands of 'JPEGFixupTagsSubsamplingSec' warnings
      if [ $? -ne 0 ]; then
        err "openslide-write-png exit error"
        exit 1
      fi
      
      # find tissue, needs improvement
      if [[ "$(wc -c < "$FILE" )" -ge "$MINBYTESIZE" ]]; then
        # more than MINBYTESIZE bytes... tissue found
        # fit within width and height
        TISSUE="tissue";
        mv "$FILE" "${filename}.${TISSUE}.png"
      else 
        # no tissue found
        TISSUE="empty";
        if [ "$KEEPEMPTYTILES" == "true" ]; then
           mv "$FILE" "${filename}"".""${TISSUE}.png"
        else
          rm -rf "$FILE"
        fi
      fi
    done
  done
  rm -rf "$TMPSTORAGE"
}

# programOutput
programOutput(){
verbose_debug "programOutput()"
  if ! [[ -f "$MASK" ]] ; then
    createTilesOpenslide
  else
    createTilesConvert
  fi
}

# all check?
checkRequirements
checkDependencies

# lets go!
programOutput